@article {journals/ijwis/KoutsonikolaPVP09,
	title = {A new approach to web users clustering and validation: a divergence-based scheme},
	journal = {IJWIS},
	volume = {5},
	number = {3},
	year = {2009},
	pages = {348-371},
	abstract = {<p>Purpose {\^a}{\texteuro}{\textquotedblleft} Web users{\^a}{\texteuro}{\texttrademark} clustering is an important mining task since it contributes in identifying usagepatterns, a beneficial task for a wide range of applications that rely on the web. The purpose of thispaper is to examine the usage of Kullback-Leibler (KL) divergence, an information theoretic distance,as an alternative option for measuring distances in web users clustering.Design/methodology/approach {\^a}{\texteuro}{\textquotedblleft} KL-divergence is compared with other well-known distancemeasures and clustering results are evaluated using a criterion function, validity indices, andgraphical representations. Furthermore, the impact of noise (i.e. occasional or mistaken page visits) isevaluated, since it is imperative to assess whether a clustering process exhibits tolerance in noisyenvironments such as the web.Findings {\^a}{\texteuro}{\textquotedblleft} The proposed KL clustering approach is of similar performance when compared withother distance measures under both synthetic and real data workloads. Moreover, imposing extranoise on real data, the approach shows minimum deterioration among most of the other conventionaldistance measures.Practical implications {\^a}{\texteuro}{\textquotedblleft} The experimental results show that a probabilistic measure such asKL-divergence has proven to be quite efficient in noisy environments and thus constitute a goodalternative, the web users clustering problem.Originality/value {\^a}{\texteuro}{\textquotedblleft} This work is inspired by the usage of divergence in clustering of biological dataand it is introduced by the authors in the area of web clustering. According to the experimental resultspresented in this paper, KL-divergence can be considered as a good alternative for measuringdistances in noisy environments such as the web.</p>
},
	keywords = {Cluster analysis, Internet Data mining, User studies},
	author = {Vassiliki A. Koutsonikola and Petridou, Sophia G. and Athena Vakali and Papadimitriou, Georgios I.}
}
@inproceedings {conf/wise/KoutsonikolaPVHB08,
	title = {Correlating Time-Related Data Sources with Co-clustering},
	booktitle = {WISE},
	series = {Lecture Notes in Computer Science},
	volume = {5175},
	year = {2008},
	pages = {264-279},
	publisher = {Springer},
	organization = {Springer},
	abstract = {<p>A huge amount of data is circulated and collected every dayon a regular time basis. Given a pair of such datasets, it might be possibleto reveal hidden dependencies between them since the presence of the onedataset elements may influence the elements of the other dataset and viceversa. Furthermore, the impact of these relations may last during a periodinstead of the time point of their co-occurrence. Mining such relationsunder those assumptions is a challenging problem. In this paper, we studytwo time-related datasets whose elements are bilaterally affected overtime. We employ a co-clustering approach to identify groups of similarelements on the basis of two distinct criteria: the direction and durationof their impact. The proposed approach is evaluated using time-relatednews and stock{\^a}{\texteuro}{\texttrademark}s market real datasets.</p>
},
	isbn = {978-3-540-85480-7},
	author = {Vassiliki A. Koutsonikola and Petridou, Sophia G. and Athena Vakali and Hacid, Hakim and Benatallah, Boualem},
	editor = {Bailey, James and Maier, David and Schewe, Klaus-Dieter and Thalheim, Bernhard and Wang, Xiaoyang Sean}
}
@article {journals/tkde/PetridouKVP08,
	title = {Time-Aware Web Users{\textquoteright} Clustering},
	journal = {IEEE Trans. Knowl. Data Eng.},
	volume = {20},
	number = {5},
	year = {2008},
	pages = {653-667},
	author = {Petridou, Sophia G. and Vassiliki A. Koutsonikola and Athena Vakali and Papadimitriou, Georgios I.}
}
@inproceedings {conf/iccsa/PetridouKVP06,
	title = {A Divergence-Oriented Approach for Web Users Clustering},
	booktitle = {ICCSA (2)},
	series = {Lecture Notes in Computer Science},
	volume = {3981},
	year = {2006},
	pages = {1229-1238},
	publisher = {Springer},
	organization = {Springer},
	abstract = {Clustering web users based on their access patterns is a quite significanttask in Web Usage Mining. Further to clustering it is important to evaluatethe resulted clusters in order to choose the best clustering for a particular framework.This paper examines the usage of Kullback-Leibler divergence, aninformation theoretic distance, in conjuction with the k-means clusteringalgorithm. It compares KL-divergence with other well known distance measures(Euclidean, Standardized Euclidean and Manhattan) and evaluates clusteringresults using both objective function{\^a}{\texteuro}{\texttrademark}s value and Davies-Bouldin index.Since it is imperative to assess whether the results of a clustering process aresusceptible to noise, especially in noisy environments such as Web environment,our approach takes the impact of noise into account. The clusters obtainedwith KL approach seem to be superior to those obtained with the otherdistance measures in case our data have been corrupted by noise.},
	isbn = {3-540-34072-6},
	author = {Petridou, Sophia G. and Vassiliki A. Koutsonikola and Athena Vakali and Papadimitriou, Georgios I.},
	editor = {Gavrilova, Marina L. and Gervasi, Osvaldo and Kumar, Vipin and Tan, Chih Jeng Kenneth and Taniar, David and Lagan{\u A} , Antonio and Mun, Youngsong and Choo, Hyunseung}
}